Project #03: Visualizing and Maintaining the Green Canopy of NYC

Introduction

This project will explore the NYC TreeMap data set in the districts, and creating visualizations of NYC’s tree density, dead fractions, species, and proximity.

Based on this data and visualizations, I will propose a new program for the NYC Parks Department to replace dead trees with new ones. This in an effort to support both environmental resilience and community well-being for New Yorkers.

Data Acquisition

Retrieve boundaries of districts

Code
library(sf)
Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
Code
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(httr2)
library(dplyr)
library(tmap)
library(units)
udunits database from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/units/share/udunits/udunits2.xml
Code
library(gt)

Attaching package: 'gt'

The following object is masked from 'package:tmap':

    metro
Code
library(ggplot2)
library(httr2)
library(fs)
library(leaflet)
library(htmltools)


get_council_districts <- function() {
  # api_url for NYC Council Districts
  api_url <- "https://services5.arcgis.com/GfwWNkhOj9bNBqoJ/arcgis/rest/services/NYC_City_Council_Districts/FeatureServer/0/query"
  geojson_file <- "data/mp03/council_districts_arcgis.geojson"

  # Download only if file doesn't exist
  if (!file.exists(geojson_file)) {
    resp <- request(api_url) %>%
      req_url_query(
        where = "1=1",
        outFields = "*",
        returnGeometry = "true",
        f = "geojson"
      ) %>%
      req_headers(`User-Agent` = "Mozilla/5.0 (Educational Project)") %>%
      req_perform()

    writeBin(resp_body_raw(resp), geojson_file)
  }

  # Read GeoJSON quietly
  districts <- st_read(geojson_file, quiet = TRUE)

  # Verify geometries
  if (all(st_is_empty(districts))) {
    stop("ERROR: Downloaded data has empty geometries!")
  }

  # Transform to WGS84
  districts <- st_transform(districts, crs = "WGS84")

  return(districts)
}

districts <- get_council_districts()

Download Tree Points

Code
get_tree_points <- function(limit = 50000) {
  # Base URL for the NYC OpenData Tree Points API (Socrata)
  base_url <- "https://data.cityofnewyork.us/resource/hn5i-inap.geojson"

  # Initialize pagination variables
  offset <- 0
  all_files <- list()
  page_num <- 1

  repeat {
    filename <- file.path("data/mp03", sprintf("trees_page_%03d.geojson", page_num))

    # Download only if file does not exist
    if (!file.exists(filename)) {
      resp <- request(base_url) %>%
        req_url_query(`$limit` = limit, `$offset` = offset) %>%
        req_headers(`User-Agent` = "Mozilla/5.0 (Educational Project)") %>%
        req_retry(max_tries = 3) %>%
        req_perform()

      writeBin(resp_body_raw(resp), filename)
    }

    # Add filename to list
    all_files[[page_num]] <- filename

    # Read current file to check number of records
    current_data <- st_read(filename, quiet = TRUE)
    n_records <- nrow(current_data)

    # Stop if fewer than limit (end of dataset)
    if (n_records < limit) break

    # Prepare for next page
    offset <- offset + limit
    page_num <- page_num + 1
    Sys.sleep(0.5)
  }

  # Read and combine all files quietly
  tree_list <- lapply(all_files, function(f) st_read(f, quiet = TRUE))
  trees <- bind_rows(tree_list)

  return(trees)
}

# Load all tree points
trees <- get_tree_points()

Data Integration and Initial Exploration

Mapping NYC Trees

Here we’re mapping NYC trees by districts

Code
trees_to_plot <- trees

# Plot
ggplot() +
  geom_sf(data = trees_to_plot, color = "#228B22", alpha = 0.3, size = 0.03) + # trees first
  geom_sf(data = districts, fill = NA, color = "darkblue", linewidth = 0.3) +    # districts on top
  coord_sf(expand = FALSE) +
  labs(
    title = "NYC Tree Points",
    subtitle = paste("All trees included:", format(nrow(trees_to_plot), big.mark = ",")),
    x = NULL, y = NULL
  ) +
  theme_minimal(base_size = 11) +
  theme(panel.grid.major = element_line(linewidth = 0.2, colour = "grey90"))

Extra Credit (Interactive Tree Map Visualization)

Interactive Map for NYC Tree Density

Code
# ---- 1. normalize district columns & add borough ----
districts_named <- districts %>%
  mutate(
    council_district = as.integer(as.character(CounDist)),
    borough = case_when(
      CounDist >= 1  & CounDist <= 10 ~ "Manhattan",
      CounDist >= 11 & CounDist <= 18 ~ "Bronx",
      CounDist >= 19 & CounDist <= 32 ~ "Queens",
      CounDist >= 33 & CounDist <= 48 ~ "Brooklyn",
      CounDist >= 49 & CounDist <= 51 ~ "Staten Island",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(borough))

# ---- 2. prepare trees ----
trees_clean <- trees %>%
  select(genusspecies, planteddate, geometry) %>%
  st_as_sf()

# ---- 3. ensure same CRS ----
districts_named <- st_transform(districts_named, 4326)
trees_clean      <- st_transform(trees_clean, 4326)

# ---- 4. join trees -> district (point-in-polygon) and count per district ----
trees_with_districts <- st_join(trees_clean, districts_named, join = st_within)

trees_per_district <- trees_with_districts %>%
  st_drop_geometry() %>%
  count(CounDist, name = "trees")

districts_density <- districts_named %>%
  mutate(
    area_km2 = as.numeric(Shape__Area) / 1e6
  ) %>%
  left_join(trees_per_district, by = "CounDist") %>%
  mutate(
    trees = ifelse(is.na(trees), 0L, trees),
    density = trees / area_km2
  )

# ---- 6. prepare popups ----
districts_density$popup <- paste0(
  "<b>Borough:</b> ", districts_density$borough, "<br>",
  "<b>District:</b> ", districts_density$CounDist, "<br>",
  "<b>Trees:</b> ", format(districts_density$trees, big.mark = ","), "<br>",
  "<b>Area:</b> ", round(districts_density$area_km2, 2), " km²<br>",
  "<b>Density:</b> ", round(districts_density$density, 1), " trees/km²"
)

# ---- 7. prepare palette ----
vmax <- ceiling(max(districts_density$density, na.rm = TRUE))
pal <- colorNumeric("YlGnBu", domain = c(0, vmax), na.color = "transparent")

# ---- 8. borough outline ----
borough_outline <- districts_named %>%
  st_make_valid() %>%
  group_by(borough) %>%
  summarise(.groups = "drop") %>%
  st_cast("MULTIPOLYGON", warn = FALSE) %>%
  st_transform(4326)

# ---- 9. sample trees for point layer ----
sample_size <- 200
trees_to_plot <- trees_clean %>% slice_sample(n = min(sample_size, nrow(trees_clean)))

# ---- 10. build leaflet map ----
leaflet(districts_density) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(
    fillColor   = ~pal(density),
    color       = "white",
    weight      = 1,
    fillOpacity = 0.85,
    popup       = ~popup,
    label       = ~paste0(
      "District ", CounDist, "\n",
      "Trees: ", format(trees, big.mark = ","), "\n",
      "Density: ", round(density, 1), " trees/km²"
    ),
    labelOptions = labelOptions(
      style = list("font-weight" = "bold", "padding" = "3px 6px"),
      textsize = "12px",
      direction = "auto"
    )
  ) %>%
  addPolylines(
    data = borough_outline,
    color = "black",
    weight = 1.25,
    opacity = 0.7
  ) %>%
  addCircleMarkers(
    data = trees_to_plot,
    radius = 1,
    color = "#2b8cbe",
    fillOpacity = 0.6,
    stroke = FALSE,
    popup = ~paste0("<b>Species:</b> ", genusspecies,
                    "<br><b>Planted:</b> ", planteddate)
  ) %>%
  addLegend(
    position = "bottomright",
    pal      = pal,
    values   = c(0, vmax),
    title    = "Tree density (trees/km²)",
    opacity  = 1
  ) %>%
  addControl("<b> Tree Density — NYC Districts</b>", position = "topright") %>%
  setView(lng = -73.94, lat = 40.70, zoom = 11)

District Analyses of Trees

Here I’m joining the districts and trees together to do more analysis. This includes which district has the most trees, highest density, highest fraction of dead trees, and most common tree species. I will be using interactive data visualizations for this as well.

Code
trees_per_district <- trees_with_districts %>%
  st_drop_geometry() %>%
  count(CounDist, name = "trees")

top_district <- trees_per_district %>%
  arrange(desc(trees)) %>%
  slice(1)

# Create gt table and highlight District 51 
top_district %>%
  gt() %>%
  tab_style(
    style = cell_fill(color = "lightblue"),
    locations = cells_body(
      columns = c("CounDist"),
      rows = CounDist == 51
    )
  ) %>%
  cols_label(
    CounDist = "CounDist",
    trees    = "Number of Trees"
  ) %>%
  tab_header(
    title = "NYC District with the Most Trees",
    subtitle = paste("Total trees in this district:", top_district$Trees)
  )
NYC District with the Most Trees
Total trees in this district:
CounDist Number of Trees
51 70928

1. Which council district has the most trees?

Code
# Get top district by number of trees
top_district <- trees_per_district %>%
  slice_max(trees, n = 1)

# Create gt table and highlight District 51
top_district %>%
  gt() %>%
  tab_style(
    style = cell_fill(color = "lightblue"),
    locations = cells_body(
      columns = c("CounDist"),
      rows = CounDist == 51
    )
  ) %>%
  cols_label(
    CounDist = "CounDist",
    trees    = "Number of Trees"
  ) %>%
  tab_header(
    title = "NYC District with the Most Trees",
    subtitle = paste("Total trees in this district:", top_district$Trees)
  )
NYC District with the Most Trees
Total trees in this district:
CounDist Number of Trees
51 70928
NoteKey Finding

District 1 is the one with the most trees.

2. Which council district has the highest density of trees?

How concentrated trees are, providing an idea of green coverage beyond tree counts.

Code
# 2. Summarize tree counts per district

trees_density <- trees_per_district %>% left_join( districts %>% st_set_geometry(NULL) %>% select(CounDist, Shape__Area), by = "CounDist" ) %>% mutate(Tree_Density = trees / Shape__Area) %>% arrange(desc(Tree_Density))


trees_density %>%
  slice_head(n = 3) %>%  
  select(CounDist, Tree_Density) %>%
  gt() %>%
  tab_header(
    title = "Top 3 NYC Districts with Highest Tree Density"
  ) %>%
  cols_label(
    CounDist = "CounDist",
    Tree_Density = "Tree Density"
  ) %>%
  fmt_number(
    columns = "Tree_Density",
    decimals = 4  
  ) %>%
  tab_style(
    style = cell_fill(color = "lightblue"),
    locations = cells_body(
      columns = everything(),
      rows = CounDist %in% c(7, 39)
    )
  )
Top 3 NYC Districts with Highest Tree Density
CounDist Tree Density
7 0.0003
39 0.0003
2 0.0002
NoteKey Finding

The council district with the highest tree density is 7 and 39.

3. Which district has highest fraction of dead trees out of all trees? Examining tree conditions across districts to assess tree health where it’s needed.

Code
trees_districts <- st_join(trees, districts, join = st_intersects, left = TRUE)


# Summarize dead fraction per district
dead_fraction <- trees_districts %>%
  st_set_geometry(NULL) %>%  # drop geometry
  group_by(CounDist) %>% # group by district as character
  summarise(
    Total_Trees = n(),
    Dead_Trees  = sum(tpcondition == "Dead", na.rm = TRUE),
    Dead_Fraction = Dead_Trees / Total_Trees
  ) %>%
  arrange(desc(Dead_Trees))

# Get top 5 districts by dead fraction
top_districts <- dead_fraction %>%
  slice_head(n = 5)

# Identify the top district for highlighting
top_district <- top_districts$CounDist[1]

# Create the table
library(gt)

top_districts %>%
  select(CounDist, Total_Trees, Dead_Trees, Dead_Fraction) %>%
  gt() %>%
  cols_label(
    CounDist  = "Council District",
    Total_Trees   = "Total Trees",
    Dead_Trees    = "Dead Trees",
    Dead_Fraction = "Dead Fraction"
  ) %>%
  fmt_number(
    columns = c(Dead_Fraction),
    decimals = 2
  ) %>%
  tab_header(
    title = "Top 5 NYC Council Districts by Dead Tree Fraction"
  ) %>%
  tab_style(
    style = cell_fill(color = "lightblue"),
    locations = cells_body(
      rows = CounDist == top_district
    )
  )
Top 5 NYC Council Districts by Dead Tree Fraction
Council District Total Trees Dead Trees Dead Fraction
51 70928 9147 0.13
50 52438 7041 0.13
19 49832 6335 0.13
23 44807 5828 0.13
49 35027 4569 0.13
NoteKey Finding

The district with the highest fraction of dead trees is 51

4. What is the most common tree species in Manhattan?

Which species dominate in Manhattan, prevailing patterns of diodiversity in this area.

Code
# Add a borough column
trees_districts <- trees_districts %>%
  mutate(
    Borough = case_when(
      CounDist >= 1  & CounDist <= 10 ~ "Manhattan",
      CounDist >= 11 & CounDist <= 18 ~ "Bronx",
      CounDist >= 19 & CounDist <= 32 ~ "Queens",
      CounDist >= 33 & CounDist <= 48 ~ "Brooklyn",
      CounDist >= 49 & CounDist <= 51 ~ "Staten Island",
      TRUE ~ "Other"
    )
  )

# Create most_common_manhattan table
most_common_manhattan <- trees_districts %>%
  filter(Borough == "Manhattan") %>%
  st_set_geometry(NULL) %>%  # drop geometry for table
  group_by(genusspecies) %>%
  summarise(n = n()) %>%
  arrange(desc(n)) %>%
  slice_head(n = 1)  # only top species

# Now display as a gt table
most_common_manhattan %>%
  gt() %>%
  cols_label(
    genusspecies = "Tree Species",
    n = "Number of Trees"
  ) %>%
  tab_header(
    title = "Most Common Tree Species in Manhattan"
  )
Most Common Tree Species in Manhattan
Tree Species Number of Trees
Gleditsia triacanthos var. inermis - Thornless honeylocust 17311
NoteKey Finding

The species in manhattan with most trees is Gleditsia triacanthos var. inermis - Thornless honeylocust.

5. What is the species of the tree closest to Baruch’s campus?

Map of Closest Tree to Baruch’s Campus

Code
#| eval: false

# --- Function to create a point ---
new_st_point <- function(lat, lon) {
  st_sfc(st_point(c(lon, lat))) |>  # longitude first
    st_set_crs("WGS84")
}

# --- Baruch College point ---
baruch <- new_st_point(lat = 40.7401, lon = -73.9832)

# --- Project to planar CRS for distance calculations ---
trees_proj <- st_transform(trees, 2263)
baruch_proj <- st_transform(baruch, 2263)

# --- Compute distances in meters ---
trees_distance <- trees_proj %>%
  mutate(distance_m = as.numeric(st_distance(geometry, baruch_proj)))

# --- Select closest tree and 500 nearest trees ---
closest_tree <- trees_distance %>% slice_min(distance_m, n = 1)
n_keep <- min(500, nrow(trees_distance))
closest_500 <- trees_distance %>% slice_min(distance_m, n = n_keep)

# --- Create 100 m buffer around Baruch ---
baruch_buf <- st_buffer(baruch_proj, 100)

# --- Transform back to WGS84 for mapping ---
closest_500 <- st_transform(closest_500, 4326)
closest_tree <- st_transform(closest_tree, 4326)
baruch <- st_transform(baruch, 4326)
baruch_buf <- st_transform(baruch_buf, 4326)

# --- Build tooltip text for the closest tree ---
closest_tree$tooltip <- paste0(
  "<b>Species:</b> ", closest_tree$genusspecies, "<br>",
  "<b>Distance:</b> ", round(closest_tree$distance_m, 1), " m"
)

# --- tmap interactive ---
tmap_mode("view")
ℹ tmap modes "plot" - "view"
ℹ toggle with `tmap::ttm()`
Code
tm_shape(closest_500) +
  tm_dots(size = 0.05, fill = "darkgreen", fill_alpha = 0.7, fill.legend = tm_legend_hide()) +  # small tree points
tm_shape(closest_tree) +
  tm_symbols(size = 1, shape = 21, fill = "green", col = "red",
             popup.vars = c("genusspecies","distance_m"), 
             id = "tooltip") +  # highlighted closest tree
tm_shape(baruch_buf) +
  tm_borders(col = "red", lwd = 2) +  # buffer
tm_shape(baruch) +
  tm_symbols(shape = 23, fill = "blue", size = 1) +  # Baruch marker
tm_basemap("OpenStreetMap") +
tm_view(set_view = c(st_coordinates(baruch)[1,1], st_coordinates(baruch)[1,2], 19))  
NoteKey Finding

The tree closest to Baruch College is a Quercus acutissima - sawtooth oak, located approximately 100.5 meters away.

NYC Parks Proposal

City Council Propose Additional Funding - Replacing Dead Trees in District 32

Overview

This initiative focuses on replacing dead trees and increasing the green space density in District 32. The goal is to enhance the neighborhood’s appearance, give the area a transformation to attract more visitors and residents, as well as supporting community well-being.

The project will contribute to a larger effort to improve Rockaway’s public spaces, making it a more attractive destination for beach-goers and creating a healthier, greener environment for residents throughout the year.

District Selection

A comparative analysis across districts shows that District 32 has a significant high dead-tree share (14% of it’s total tree population). Focusing on giving resources to replacing trees can make a great impact on the affected neighborhoods.

Scope and Goals

  • Replace 100 dead trees to reduce hazards and make space for healthy trees
  • Plant 200 new trees, with a mix of tree species to increase green space density and biodiversity
  • Planting new trees will additionally support keeping streets cooler during summer heat waves, filter air pollutants to public health, manage stormwater to reduce flood risk, and boosting property values.

Reducing air pollution and supporting public health

Managing stormwater and lowering flood risk

Improving neighborhood aesthetics and boosting property values

Supporting Visuals

As seen below in the comparison chart, District 32 has as of today, the highest dead-tree fractions in the city, making it a priority for target action.

Code
# Summarize dead trees per district
dead_fraction <- trees_districts %>%
  st_set_geometry(NULL) %>%                  
  mutate(CounDist = as.character(CounDist)) %>% 
  group_by(CounDist) %>%
  summarise(
    Total_Trees   = n(),
    Dead_Trees    = sum(tpcondition == "Dead", na.rm = TRUE),
    Dead_Fraction = Dead_Trees / Total_Trees,
    .groups = "drop"
  ) %>%
  arrange(desc(Dead_Trees))

dead_fraction_clean <- dead_fraction %>%
  filter(!is.na(CounDist)) %>%  # remove NA
  mutate(highlight = ifelse(CounDist == "32", "District 32", "Other Districts"))

ggplot(dead_fraction_clean, aes(x = reorder(CounDist, Dead_Fraction), y = Dead_Fraction, fill = highlight)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = c("District 32" = "red", "Other Districts" = "grey80")) +
  labs(
    title = "Dead Tree Share Across All NYC Districts",
    x = "Council District",
    y = "Dead Tree Share",
    fill = ""
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 6) 
  )

As seen below is a detailed table of NYC District’s with the 5 highest dead trees share. With District 32 having 4309 dead trees out of 30290 trees, making it 14% of it’s total tree population.

Code
# Summarize dead trees per district
dead_fraction <- trees_districts %>%
  st_set_geometry(NULL) %>%                  
  mutate(CounDist = as.character(CounDist)) %>% 
  group_by(CounDist) %>%
  summarise(
    Total_Trees   = n(),
    Dead_Trees    = sum(tpcondition == "Dead", na.rm = TRUE),
    Dead_Fraction = Dead_Trees / Total_Trees,
    .groups = "drop"
  ) %>%
  arrange(desc(Dead_Fraction))  # sort by fraction, not total dead trees

# Top 5 districts by dead fraction
top_districts <- dead_fraction %>% slice_head(n = 5)

# Identify the top district for highlighting
top_district <- top_districts$CounDist[1]

# Create the table
top_districts %>%
  select(CounDist, Total_Trees, Dead_Trees, Dead_Fraction) %>%
  gt() %>%
  cols_label(
    CounDist  = "Council District",
    Total_Trees   = "Total Trees",
    Dead_Trees    = "Dead Trees",
    Dead_Fraction = "Dead Tree Share"
  ) %>%
  fmt_number(
    columns = Dead_Fraction,
    decimals = 2
  ) %>%
  tab_header(
    title = "NYC 5 Highest Council Districts by Dead Tree Share"
  ) %>%
  tab_style(
    style = cell_fill(color = "lightpink"),
    locations = cells_body(
      rows = CounDist == top_district
    )
  )
NYC 5 Highest Council Districts by Dead Tree Share
Council District Total Trees Dead Trees Dead Tree Share
32 30290 4309 0.14
30 23000 3227 0.14
2 11549 1571 0.14
50 52438 7041 0.13
29 19960 2678 0.13

This is a zoomed-in map of District 32 highlighting the dead trees, we will start our proposed work to target for replacement.

Code
# --- Filter District 32 and dead trees ---
dead_d32  <- trees_districts %>% 
  filter(CounDist == "32" & tpcondition == "Dead")
district32 <- districts %>% filter(CounDist == 32)

# --- Bounding box for zoom ---
bbox32 <- st_bbox(district32)
xlim <- c(bbox32["xmin"] - 0.001, bbox32["xmax"] + 0.001)
ylim <- c(bbox32["ymin"] - 0.001, bbox32["ymax"] + 0.001)

# --- Plot ---
ggplot() +
  geom_sf(data = district32, fill = NA, color = "grey60", linewidth = 0.5) +   # District outline
  geom_sf(data = dead_d32, color = "darkred", alpha = 0.6, size = 0.12) +     # All dead trees
  coord_sf(xlim = xlim, ylim = ylim, expand = FALSE) +
  labs(
    title = "Map of Dead Trees in NYC District 32"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.major = element_line(linewidth = 0.2, colour = "grey90"),
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank()
  )

Conclusion

This initiative is part of a broader effort to improve Rockaway’s public spaces, making it safer, greener, and more attractive year-round. By addressing the urgent need for tree replacement in District 32, the project supports both environmental resilience and community well-being.